Roey Angel 2021-05-21
This analysis tests the effect of library read-depth distribution on the community composition. It then performs various read-depth normalisation methods on the DADA2 zOTU-based dataset, for determining the optimal strategy to handle the bias of uneven read distribution.
set.seed(1000)
min_lib_size <- 5000
metadata_path <- "./"
data_path <- "./DADA2_pseudo/"
Metadata_table <- "TeaTime_joint_Bacteria_metadata.csv"
Seq_table <- "DADA2.seqtab_nochim.tsv"Read abundance table, taxonomic classification and metadata into a phyloseq object. Also remove sequences detected as contaminants in 03_Decontamination.html.
# read OTU mat from data file
Raw_data <- read_tsv(paste0(data_path, Seq_table),
trim_ws = TRUE)
contaminant_seqs <- read_csv(paste0(data_path, "decontam_contaminants.csv"),
trim_ws = TRUE,
col_names = FALSE)
Raw_data %<>% # remove contaminant OTUs.
# .[, -grep("CTRL", colnames(.))] %>% # remove ext. cont.
.[!(Raw_data$`#OTU` %in% contaminant_seqs$X1), ]
Raw_data[, 2:ncol(Raw_data)] %>%
t() %>%
as.data.frame() -> abundance_mat # convert to abundance matrix
colnames(abundance_mat) <- pull(Raw_data, "#OTU") # add sequence names
# Read metadata file
read_csv(paste0(metadata_path, Metadata_table),
trim_ws = TRUE) %>%
mutate_at(
c(
"Workshop",
"Season",
"Run",
"Type",
"Sample type",
"Field",
"Replicate",
"Control",
"Gene"
),
funs(factor(.))
) %>%
mutate_at(c("Extr. Date", "PCR products_16S_send for seq"), ~as.Date(., "%d.%m.%Y")) ->
Metadata
Metadata$Season %<>% fct_relevel("Winter", "Spring", "Summer", "Autumn")
Metadata$Read1_file <- str_replace(Metadata$Read1_file, "(.*)_L001_R1_001.fastq.gz|\\.1\\.fastq.gz", "\\1")
Metadata <- Metadata[Metadata$Read1_file %in% rownames(abundance_mat), ] # remove metadata rows if the samples did not go through qual processing
# Order abundance_mat samples according to the metadata
sample_order <- match(rownames(abundance_mat), Metadata$Read1_file)
abundance_mat %<>%
rownames_to_column('sample_name') %>%
arrange(., sample_order) %>%
column_to_rownames('sample_name') # needed for phyloseq
Metadata$Library.size <- rowSums(abundance_mat)
Metadata <- data.frame(row.names = Metadata$Read1_file, Metadata)
# generate phyloseq object
Ps_obj <- phyloseq(otu_table(abundance_mat, taxa_are_rows = FALSE),
sample_data(Metadata)
)
Ps_obj <- filter_taxa(Ps_obj, function(x) sum(x) > 0, TRUE) # remove 0 abundance taxa
Ps_obj <- subset_samples(Ps_obj, sample_sums(Ps_obj) > 0) # remove 0 abundance samples
# Remove mock and control samples
Ps_obj <- subset_samples(Ps_obj, Type != "Control" & Type != "Mock")
# Create a grouping variable for merging
sample_data(Ps_obj) %<>%
as(., "data.frame") %>%
# get_variable(., c("Sample.type", "Field", "Season", "Replicate")) %>%
unite(., "Description", c("Sample.type", "Field", "Season", "Replicate"), remove = FALSE)
sample_data(Ps_obj)$Description %<>% as_factor(.)
# merged_Ps_obj <- merge_samples(Ps_obj, "Description")
# merged_SD <- merge_samples(sample_data(Ps_obj), "Description")
Ps_obj_merged <- MergeSamples(Ps_obj, grouping_name = "Description")First let’s look at the count data distribution
PlotLibDist(Ps_obj)get_variable(Ps_obj) %>%
remove_rownames %>%
dplyr::select(Sample.name, Library.size) %>%
as(., "data.frame") %>%
format_engr() %>%
kable(., col.names = c("Sample name", "Library size")) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F)| Sample name | Library size |
|---|---|
| GK1 | 28.94 × 103 |
| GK1 | 50.44 × 103 |
| GK2 | 26.81 × 103 |
| GK2 | 71.08 × 103 |
| GK3 | 32.51 × 103 |
| GK3 | 32.52 × 103 |
| GK4 | 32.20 × 103 |
| GK4 | 35.47 × 103 |
| RK1 | 32.99 × 103 |
| RK1 | 25.69 × 103 |
| RK2 | 32.45 × 103 |
| RK2 | 45.07 × 103 |
| RK3 | 33.51 × 103 |
| RK3 | 16.63 × 103 |
| RK4 | 34.32 × 103 |
| RK4 | 32.02 × 103 |
| GB1 | 35.71 × 103 |
| GB1 | 49.37 × 103 |
| GB2 | 38.45 × 103 |
| GB2 | 22.98 × 103 |
| GB3 | 36.92 × 103 |
| GB3 | 30.32 × 103 |
| GB4 | 34.06 × 103 |
| GB4 | 25.20 × 103 |
| RB1 | 30.92 × 103 |
| RB1 | 36.23 × 103 |
| RB2 | 36.95 × 103 |
| RB2 | 23.29 × 103 |
| RB3 | 36.15 × 103 |
| RB3 | 29.30 × 103 |
| RB4 | 39.52 × 103 |
| RB4 | 19.58 × 103 |
| GA1 | 30.44 × 103 |
| GA1 | 23.58 × 103 |
| GA2 | 39.63 × 103 |
| GA2 | 19.50 × 103 |
| GA3 | 36.05 × 103 |
| GA3 | 12.77 × 103 |
| GA4 | 34.87 × 103 |
| GA4 | 19.85 × 103 |
| RA1 | 31.64 × 103 |
| RA1 | 33.09 × 103 |
| RA2 | 39.14 × 103 |
| RA2 | 14.18 × 103 |
| RA3 | 37.75 × 103 |
| RA3 | 19.79 × 103 |
| RA4 | 38.75 × 103 |
| RA4 | 21.64 × 103 |
| K1 | 19.92 × 103 |
| K1 | 24.69 × 103 |
| B1 | 15.68 × 103 |
| B1 | 20.69 × 103 |
| A1 | 34.63 × 103 |
| A1 | 28.78 × 103 |
| A1-1-2 | 34.10 × 103 |
| B1-1-2 | 24.93 × 103 |
| K1-1-2 | 27.59 × 103 |
| RF | 2.080 × 103 |
| RF | 8.196 × 103 |
| GF | 2.980 × 103 |
| GF | 14.27 × 103 |
| GK5 | 45.22 × 103 |
| GK7 | 34.08 × 103 |
| GK8 | 32.18 × 103 |
| RK5 | 35.68 × 103 |
| RK6 | 46.20 × 103 |
| RK7 | 33.14 × 103 |
| RK8 | 31.62 × 103 |
| GB5 | 30.27 × 103 |
| GB6 | 33.33 × 103 |
| GB7 | 32.68 × 103 |
| GB8 | 37.00 × 103 |
| RB5 | 27.74 × 103 |
| RB6 | 26.78 × 103 |
| RB7 | 32.31 × 103 |
| RB8 | 38.73 × 103 |
| GA5 | 33.86 × 103 |
| GA6 | 37.63 × 103 |
| GA7 | 35.71 × 103 |
| GA8 | 41.33 × 103 |
| RA5 | 32.83 × 103 |
| RA6 | 31.90 × 103 |
| RA7 | 37.63 × 103 |
| RA8 | 37.33 × 103 |
| K1-2-1 | 27.38 × 103 |
| B1-2-1 | 34.18 × 103 |
| A1-2-1 | 35.75 × 103 |
| K1-2-2 | 32.31 × 103 |
| B1-2-2 | 33.40 × 103 |
| A1-2-2 | 32.97 × 103 |
| GK10 | 41.93 × 103 |
| GK11 | 44.16 × 103 |
| RK9 | 48.02 × 103 |
| RK10 | 43.50 × 103 |
| RK12 | 40.76 × 103 |
| GB9 | 38.22 × 103 |
| GB10 | 38.48 × 103 |
| GB11 | 33.14 × 103 |
| GB12 | 31.39 × 103 |
| RB9 | 46.35 × 103 |
| RB10 | 44.48 × 103 |
| RB11 | 43.94 × 103 |
| RB12 | 6.680 × 103 |
| GA9 | 36.23 × 103 |
| GA10 | 35.75 × 103 |
| GA11 | 38.70 × 103 |
| GA12 | 36.52 × 103 |
| RA9 | 36.78 × 103 |
| RA10 | 36.15 × 103 |
| RA11 | 37.26 × 103 |
| RA12 | 53.28 × 103 |
| K1-3-1 | 31.98 × 103 |
| K1-3-2 | 34.79 × 103 |
| K1-3-3 | 47.26 × 103 |
| B1-3-1 | 32.99 × 103 |
| B1-3-2 | 37.53 × 103 |
| B1-3-3 | 36.56 × 103 |
| A1-3-1 | 43.96 × 103 |
| A1-3-2 | 48.38 × 103 |
| A1-3-3 | 27.25 × 103 |
| RK13 | 47.76 × 103 |
| RK14 | 47.93 × 103 |
| RK15 | 32.01 × 103 |
| RK16 | 51.30 × 103 |
| GK13 | 58.42 × 103 |
| GK14 | 50.94 × 103 |
| GK15 | 59.82 × 103 |
| GK16 | 52.24 × 103 |
| RB13 | 53.48 × 103 |
| RB14 | 50.98 × 103 |
| RB15 | 34.69 × 103 |
| RB16 | 56.18 × 103 |
| GB13 | 42.41 × 103 |
| GB14 | 35.62 × 103 |
| GB15 | 41.01 × 103 |
| GB16 | 58.61 × 103 |
| RA13 | 49.54 × 103 |
| RA14 | 60.50 × 103 |
| RA15 | 50.59 × 103 |
| RA16 | 56.54 × 103 |
| GA13 | 39.18 × 103 |
| GA14 | 37.95 × 103 |
| GA15 | 46.26 × 103 |
| GA16 | 40.85 × 103 |
| soil 6-1 | 36.19 × 103 |
| soil 6-2 | 21.35 × 103 |
| soil 6-3 | 24.06 × 103 |
| soil 5-1 | 23.86 × 103 |
| soil 5-2 | 27.95 × 103 |
| soil 5-3 | 28.37 × 103 |
| soil 4-1 | 1.677 × 103 |
| soil 4-2 | 19.94 × 103 |
| soil 4-3 | 25.92 × 103 |
The figure and table indicate only a small deviation in the number of reads per samples.
(mod1 <- adonis(vegdist(otu_table(Ps_obj), method = "bray") ~ Library.size,
data = as(sample_data(Ps_obj), "data.frame"),
permutations = 999
))##
## Call:
## adonis(formula = vegdist(otu_table(Ps_obj), method = "bray") ~ Library.size, data = as(sample_data(Ps_obj), "data.frame"), permutations = 999)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Library.size 1 2.795 2.79458 8.4201 0.05282 0.001 ***
## Residuals 151 50.116 0.33189 0.94718
## Total 152 52.911 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
PlotReadHist(as(otu_table(Ps_obj), "matrix"))notAllZero <- (rowSums(t(otu_table(Ps_obj))) > 0)
meanSdPlot(as.matrix(log2(t(otu_table(Ps_obj))[notAllZero, ] + 1))) Nevertheless, modelling library size shows a significant effect of read depth on the community structure (explaining only 5%). The reads histogram shows as expected a highly sparse and skewed sequence matrix. The mean vs SD also shows as expected large dependency of SD on the mean reads of a sequence across all samples.
# subsample libraries from 1000 to max(sample_sums(Ps_obj)) and test
for (i in seq(1000, max(sample_sums(Ps_obj)), 1000)) {
min_seqs <<- i
Ps_obj_pruned <- subset_samples(Ps_obj, sample_sums(Ps_obj) > i)
mod <-
adonis2(vegdist(otu_table(Ps_obj_pruned), method = "bray") ~ sample_sums(Ps_obj_pruned), # I use adonis2 because it gives a data.frame
data = get_variable(Ps_obj_pruned),
permutations = 999
)
Pval <- tidy(mod)$p.value[1]
if (Pval > 0.05)
break()
}Only by subsetting the samples to a minimum library size of 3.9 × 104 sequences do we get independence from library size but this forces us to drop 111 out of 153 samples.
Let’s see the effect of this
Ps_obj_pruned_harsh <- subset_samples(Ps_obj_merged, sample_sums(Ps_obj) > i)
adonis(
vegdist(otu_table(Ps_obj_pruned_harsh), method = "bray") ~ Library.size,
data =
get_variable(Ps_obj_pruned_harsh),
permutations = 999
)##
## Call:
## adonis(formula = vegdist(otu_table(Ps_obj_pruned_harsh), method = "bray") ~ Library.size, data = get_variable(Ps_obj_pruned_harsh), permutations = 999)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Library.size 1 1.2659 1.26593 3.8539 0.14351 0.001 ***
## Residuals 23 7.5551 0.32848 0.85649
## Total 24 8.8210 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis(
vegdist(otu_table(Ps_obj_pruned_harsh), method = "bray") ~ Field + Sample.type * Season,
data =
get_variable(Ps_obj_pruned_harsh),
permutations = 999
)##
## Call:
## adonis(formula = vegdist(otu_table(Ps_obj_pruned_harsh), method = "bray") ~ Field + Sample.type * Season, data = get_variable(Ps_obj_pruned_harsh), permutations = 999)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Field 3 1.1018 0.36726 1.8846 0.12490 0.005 **
## Sample.type 2 3.1843 1.59213 8.1701 0.36099 0.001 ***
## Season 3 1.3995 0.46651 2.3939 0.15866 0.001 ***
## Sample.type:Season 2 0.4072 0.20359 1.0447 0.04616 0.417
## Residuals 14 2.7282 0.19487 0.30929
## Total 24 8.8210 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
PlotReadHist(as(otu_table(Ps_obj_pruned_harsh), "matrix"))notAllZero <- (rowSums(t(otu_table(Ps_obj_pruned_harsh))) > 0)
meanSdPlot(as.matrix(log2(t(otu_table(Ps_obj_pruned_harsh))[notAllZero, ] + 1)))Pruned_ord <- ordinate(Ps_obj_pruned_harsh, "CAP", "bray", formula = Ps_obj_pruned_harsh ~ Field + Sample.type * Season)
plot_ordination(Ps_obj_pruned_harsh, Pruned_ord, type = "samples", color = "Sample.type", label = "Sample.name", shape = "Field") +
facet_wrap(~ Season) #+ # geom_point(size = 5) +
# geom_text(size = 5) Instead let’s drop all samples below 5000 reads and do try some correction methods for library depths
Ps_obj_pruned_min <- subset_samples(Ps_obj_merged, sample_sums(Ps_obj_merged) > min_lib_size)
Ps_obj_pruned_min <- filter_taxa(Ps_obj_pruned_min, function(x) sum(x) > 0, TRUE)Ps_obj_pruned_rared <-
rarefy_even_depth(
Ps_obj_pruned_min,
sample.size = min(sample_sums(Ps_obj_pruned_min)),
rngseed = FALSE,
replace = FALSE
)
sample_data(Ps_obj_pruned_rared)$Library.size <- sample_sums(Ps_obj_pruned_rared)
# Ps_obj_pruned_rared <- Ps_obj_pruned_min
# Ps_obj_pruned_min %>%
# otu_table() %>%
# rowSums() %>%
# min() %>%
# rrarefy(otu_table(Ps_obj_pruned_min), .) ->
# otu_table(Ps_obj_pruned_rared)
adonis(
vegdist(otu_table(Ps_obj_pruned_rared), method = "bray") ~ Field + Sample.type * Season,
data =
get_variable(Ps_obj_pruned_rared),
permutations = 999
)##
## Call:
## adonis(formula = vegdist(otu_table(Ps_obj_pruned_rared), method = "bray") ~ Field + Sample.type * Season, data = get_variable(Ps_obj_pruned_rared), permutations = 999)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Field 3 1.944 0.6481 4.208 0.04457 0.001 ***
## Sample.type 2 12.310 6.1552 39.963 0.28219 0.001 ***
## Season 3 7.903 2.6345 17.104 0.18117 0.001 ***
## Sample.type:Season 6 4.833 0.8054 5.229 0.11077 0.001 ***
## Residuals 108 16.635 0.1540 0.38130
## Total 122 43.625 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
PlotLibDist(Ps_obj_pruned_rared)PlotReadHist(as(otu_table(Ps_obj_pruned_rared), "matrix"))notAllZero <- (rowSums(t(otu_table(Ps_obj_pruned_rared))) > 0)
meanSdPlot(as.matrix(log2(t(otu_table(Ps_obj_pruned_rared))[notAllZero, ] + 1)))Rared_ord <- ordinate(Ps_obj_pruned_rared, "CAP", "bray", formula = Ps_obj_pruned_rared ~ Field + Sample.type * Season)
plot_ordination(Ps_obj_pruned_rared, Rared_ord, type = "samples", color = "Sample.type", label = "Sample.name", shape = "Field") +
facet_wrap(~ Season) #+ # geom_point(size = 5) Ps_obj_pruned_GMPR <- Ps_obj_pruned_min
Ps_obj_pruned_min %>%
otu_table(.) %>%
t() %>%
as(., "matrix") %>%
GMPR() ->
GMPR_factors## Begin GMPR size factor calculation ...
## 50
## 100
## Completed!
## Please watch for the samples with limited sharing with other samples based on NSS! They may be outliers!
Ps_obj_pruned_min %>%
otu_table(.) %>%
t() %*% diag(1 / GMPR_factors$gmpr) %>%
t() %>%
as.data.frame(., row.names = sample_names(Ps_obj_pruned_min)) %>%
otu_table(., taxa_are_rows = FALSE) ->
otu_table(Ps_obj_pruned_GMPR)
sample_data(Ps_obj_pruned_GMPR)$Library.size <- sample_sums(Ps_obj_pruned_GMPR)
adonis(
vegdist(otu_table(Ps_obj_pruned_GMPR), method = "bray") ~ Library.size,
data =
get_variable(Ps_obj_pruned_GMPR),
permutations = 999
)##
## Call:
## adonis(formula = vegdist(otu_table(Ps_obj_pruned_GMPR), method = "bray") ~ Library.size, data = get_variable(Ps_obj_pruned_GMPR), permutations = 999)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Library.size 1 2.092 2.0921 6.1696 0.04852 0.001 ***
## Residuals 121 41.031 0.3391 0.95148
## Total 122 43.123 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis(
vegdist(otu_table(Ps_obj_pruned_GMPR), method = "bray") ~ Field + Sample.type * Season,
data =
get_variable(Ps_obj_pruned_GMPR),
permutations = 999
)##
## Call:
## adonis(formula = vegdist(otu_table(Ps_obj_pruned_GMPR), method = "bray") ~ Field + Sample.type * Season, data = get_variable(Ps_obj_pruned_GMPR), permutations = 999)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Field 3 1.973 0.6575 4.532 0.04574 0.001 ***
## Sample.type 2 12.528 6.2642 43.172 0.29052 0.001 ***
## Season 3 8.126 2.7086 18.667 0.18843 0.001 ***
## Sample.type:Season 6 4.826 0.8043 5.543 0.11190 0.001 ***
## Residuals 108 15.671 0.1451 0.36340
## Total 122 43.123 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
PlotLibDist(Ps_obj_pruned_GMPR)PlotReadHist(as(otu_table(Ps_obj_pruned_GMPR), "matrix"))notAllZero <- (rowSums(t(otu_table(Ps_obj_pruned_GMPR))) > 0)
meanSdPlot(as.matrix(log2(t(otu_table(Ps_obj_pruned_GMPR))[notAllZero, ] + 1)))GMPR_ord <- ordinate(Ps_obj_pruned_GMPR, "CAP", "bray", formula = Ps_obj_pruned_GMPR ~ Field + Sample.type * Season)
plot_ordination(Ps_obj_pruned_GMPR, GMPR_ord, type = "samples", color = "Sample.type", label = "Sample.name", shape = "Field") +
facet_wrap(~ Season)# Cumulative sum scaling normalization
Ps_obj_pruned_CSS <- Ps_obj_pruned_min
Ps_obj_pruned_CSS %>%
otu_table(.) %>%
t() %>%
as(., "matrix") %>%
newMRexperiment(.) ->
mr_obj
p <- cumNormStatFast(mr_obj)
cumNormMat(mr_obj, p = p) %>%
otu_table(., taxa_are_rows = TRUE) ->
otu_table(Ps_obj_pruned_CSS)
# Did any OTU produce Na?
Ps_obj_pruned_CSS %>%
otu_table() %>%
as(., "matrix") %>%
t(.) %>%
apply(., 2, function(x) !any(is.na(x))) %>%
unlist() ->
OTUs2keep
Ps_obj_pruned_CSS <- prune_taxa(OTUs2keep, Ps_obj_pruned_CSS)
sample_data(Ps_obj_pruned_CSS)$Library.size <- sample_sums(Ps_obj_pruned_CSS)
# adonis(
# vegdist(otu_table(Ps_obj_pruned_CS), method = "bray") ~ Library.size,
# data =
# get_variable(Ps_obj_pruned_CS), "data.frame"),
# permutations = 999
# )
# adonis(
# vegdist(otu_table(Ps_obj_pruned_CS), method = "bray") ~ Field + Sample.type * Season,
# data =
# get_variable(Ps_obj_pruned_CS), "data.frame"),
# permutations = 999
# )
PlotLibDist(Ps_obj_pruned_CSS)PlotReadHist(as(otu_table(Ps_obj_pruned_CSS), "matrix"))notAllZero <- (rowSums(t(otu_table(Ps_obj_pruned_CSS))) > 0)
meanSdPlot(as(log2(t(otu_table(Ps_obj_pruned_CSS))[notAllZero, ] + 1), "matrix"))CSS_ord <- ordinate(Ps_obj_pruned_CSS, "CAP", "bray", formula = Ps_obj_pruned_CS ~ Field + Sample.type * Season)
plot_ordination(Ps_obj_pruned_CSS, CSS_ord, type = "samples", color = "Sample.type", label = "Sample.name", shape = "Field") +
facet_wrap(~ Season)Ps_obj_pruned_min %>%
otu_table(.) %>%
as(., "matrix") %>%
rowSums() %>%
median() ->
total
standf = function(x, t=total) round(t * (x / sum(x)))
Ps_obj_pruned_median <- transform_sample_counts(Ps_obj_pruned_min, standf) # Standardize abundances to median sequencing depth
sample_data(Ps_obj_pruned_median)$Library.size <- sample_sums(Ps_obj_pruned_median)
adonis(
vegdist(otu_table(Ps_obj_pruned_median), method = "bray") ~ Library.size,
data =
get_variable(Ps_obj_pruned_median),
permutations = 999
)##
## Call:
## adonis(formula = vegdist(otu_table(Ps_obj_pruned_median), method = "bray") ~ Library.size, data = get_variable(Ps_obj_pruned_median), permutations = 999)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Library.size 1 0.956 0.95566 2.7651 0.02234 0.005 **
## Residuals 121 41.820 0.34562 0.97766
## Total 122 42.775 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis(
vegdist(otu_table(Ps_obj_pruned_median), method = "bray") ~ Field + Sample.type * Season,
data =
get_variable(Ps_obj_pruned_median),
permutations = 999
)##
## Call:
## adonis(formula = vegdist(otu_table(Ps_obj_pruned_median), method = "bray") ~ Field + Sample.type * Season, data = get_variable(Ps_obj_pruned_median), permutations = 999)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Field 3 1.927 0.6423 4.584 0.04505 0.001 ***
## Sample.type 2 12.861 6.4306 45.889 0.30067 0.001 ***
## Season 3 8.066 2.6885 19.185 0.18856 0.001 ***
## Sample.type:Season 6 4.787 0.7978 5.693 0.11191 0.001 ***
## Residuals 108 15.135 0.1401 0.35382
## Total 122 42.775 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
PlotLibDist(Ps_obj_pruned_median)PlotReadHist(as(otu_table(Ps_obj_pruned_median), "matrix"))notAllZero <- (rowSums(t(otu_table(Ps_obj_pruned_median))) > 0)
meanSdPlot(as(log2(t(otu_table(Ps_obj_pruned_median))[notAllZero, ] + 1), "matrix"))Median_ord <- ordinate(Ps_obj_pruned_median, "CAP", "bray", formula = Ps_obj_pruned_median ~ Field + Sample.type * Season)
plot_ordination(Ps_obj_pruned_median, Median_ord, type = "samples", color = "Sample.type", label = "Sample.name", shape = "Field") +
facet_wrap(~ Season)Ps_obj_pruned_log <- transform_sample_counts(Ps_obj_pruned_min, function(x) log(1 + x))
# Ps_obj_pruned_rlog <- Ps_obj_pruned_min
# Ps_obj_pruned_min %>%
# transform_sample_counts(., function(x) (1 + x)) %>% # add pseudocount
# phyloseq_to_deseq2(., ~ Spill) %>%
# rlog(., blind = TRUE , fitType = "parametric") %>%
# assay() %>%
# otu_table(, taxa_are_rows = TRUE) ->
# otu_table(Ps_obj_pruned_rlog)
sample_data(Ps_obj_pruned_log)$Library.size <- sample_sums(Ps_obj_pruned_log)
adonis(
vegdist(otu_table(Ps_obj_pruned_log), method = "bray") ~ Library.size,
data =
get_variable(Ps_obj_pruned_log),
permutations = 999
)##
## Call:
## adonis(formula = vegdist(otu_table(Ps_obj_pruned_log), method = "bray") ~ Library.size, data = get_variable(Ps_obj_pruned_log), permutations = 999)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Library.size 1 8.263 8.2626 37.894 0.23849 0.001 ***
## Residuals 121 26.383 0.2180 0.76151
## Total 122 34.646 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis(
vegdist(otu_table(Ps_obj_pruned_log), method = "bray") ~ Field + Sample.type * Season,
data =
get_variable(Ps_obj_pruned_log),
permutations = 999
)##
## Call:
## adonis(formula = vegdist(otu_table(Ps_obj_pruned_log), method = "bray") ~ Field + Sample.type * Season, data = get_variable(Ps_obj_pruned_log), permutations = 999)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Field 3 1.640 0.5465 4.822 0.04733 0.001 ***
## Sample.type 2 10.161 5.0804 44.823 0.29328 0.001 ***
## Season 3 7.247 2.4156 21.312 0.20917 0.001 ***
## Sample.type:Season 6 3.357 0.5595 4.937 0.09690 0.001 ***
## Residuals 108 12.241 0.1133 0.35333
## Total 122 34.646 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
PlotLibDist(Ps_obj_pruned_log)PlotReadHist(as(otu_table(Ps_obj_pruned_log), "matrix"), b.width = 0.1)notAllZero <- (rowSums(t(otu_table(Ps_obj_pruned_log))) > 0)
meanSdPlot(as(log2(t(otu_table(Ps_obj_pruned_log))[notAllZero, ] + 1), "matrix"))Rlog_ord <- ordinate(Ps_obj_pruned_log, "CAP", "bray", formula = Ps_obj_pruned_log ~ Field + Sample.type * Season)
plot_ordination(Ps_obj_pruned_log, Rlog_ord, type = "samples", color = "Sample.type", label = "Sample.name", shape = "Field") +
facet_wrap(~ Season)Ps_obj_pruned_CLR <- phyloseq_CLR(Ps_obj_pruned_min)## No. corrected values: 41013
sample_data(Ps_obj_pruned_CLR)$Library.size <- sample_sums(Ps_obj_pruned_CLR)
qplot(rowSums(otu_table(Ps_obj_pruned_CLR)), geom = "histogram") +
xlab("Library size")adonis(
vegdist(otu_table(Ps_obj_pruned_CLR), method = "euclidean") ~ Library.size,
data =
get_variable(Ps_obj_pruned_CLR),
permutations = 999
)##
## Call:
## adonis(formula = vegdist(otu_table(Ps_obj_pruned_CLR), method = "euclidean") ~ Library.size, data = get_variable(Ps_obj_pruned_CLR), permutations = 999)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Library.size 1 25053 25053 1.2649 0.01035 0.197
## Residuals 121 2396557 19806 0.98965
## Total 122 2421610 1.00000
adonis(
vegdist(otu_table(Ps_obj_pruned_CLR), method = "euclidean") ~ Field + Sample.type * Season,
data =
get_variable(Ps_obj_pruned_CLR),
permutations = 999
)##
## Call:
## adonis(formula = vegdist(otu_table(Ps_obj_pruned_CLR), method = "euclidean") ~ Field + Sample.type * Season, data = get_variable(Ps_obj_pruned_CLR), permutations = 999)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Field 3 94783 31594 3.236 0.03914 0.001 ***
## Sample.type 2 660984 330492 33.849 0.27295 0.001 ***
## Season 3 396633 132211 13.541 0.16379 0.001 ***
## Sample.type:Season 6 214726 35788 3.665 0.08867 0.001 ***
## Residuals 108 1054484 9764 0.43545
## Total 122 2421610 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
PlotLibDist(Ps_obj_pruned_CLR)PlotReadHist(as(otu_table(Ps_obj_pruned_CLR), "matrix"), b.width = 0.1)notAllZero <- (rowSums(t(otu_table(Ps_obj_pruned_CLR))) > 0)
meanSdPlot(as(log2(t(otu_table(Ps_obj_pruned_CLR))[notAllZero, ] + 1), "matrix"))CLR_ord <- ordinate(Ps_obj_pruned_CLR, "RDA", formula = Ps_obj_pruned_CLR ~ Field + Sample.type * Season)
plot_ordination(Ps_obj_pruned_CLR, CLR_ord, type = "samples", color = "Sample.type", label = "Sample.name", shape = "Field") +
facet_wrap(~ Season)sessioninfo::session_info() %>%
details::details(
summary = 'Current session info',
open = TRUE
)─ Session info ─────────────────────────────────────────────────────────────────────────
setting value
version R version 4.0.3 (2020-10-10)
os Ubuntu 18.04.5 LTS
system x86_64, linux-gnu
ui X11
language (EN)
collate en_US.UTF-8
ctype en_US.UTF-8
tz Europe/Prague
date 2021-05-21
─ Packages ─────────────────────────────────────────────────────────────────────────────
package * version date lib source
ade4 1.7-16 2020-10-28 [1] CRAN (R 4.0.2)
affy 1.68.0 2020-10-27 [1] Bioconductor
affyio 1.60.0 2020-10-27 [1] Bioconductor
ape * 5.5 2021-04-25 [1] CRAN (R 4.0.3)
assertthat 0.2.1 2019-03-21 [1] CRAN (R 4.0.2)
backports 1.2.1 2020-12-09 [1] CRAN (R 4.0.2)
Biobase * 2.50.0 2020-10-27 [1] Bioconductor
BiocGenerics * 0.36.1 2021-04-16 [1] Bioconductor
BiocManager 1.30.15 2021-05-11 [1] CRAN (R 4.0.3)
biomformat 1.18.0 2020-10-27 [1] Bioconductor
Biostrings * 2.58.0 2020-10-27 [1] Bioconductor
bit 4.0.4 2020-08-04 [1] CRAN (R 4.0.2)
bit64 4.0.5 2020-08-30 [1] CRAN (R 4.0.2)
bitops 1.0-7 2021-04-24 [1] CRAN (R 4.0.3)
blob 1.2.1 2020-01-20 [1] CRAN (R 4.0.2)
broom * 0.7.6 2021-04-05 [1] CRAN (R 4.0.3)
cachem 1.0.5 2021-05-15 [1] CRAN (R 4.0.3)
caTools 1.18.2 2021-03-28 [1] CRAN (R 4.0.3)
cellranger 1.1.0 2016-07-27 [1] CRAN (R 4.0.2)
cli 2.5.0 2021-04-26 [1] CRAN (R 4.0.3)
clipr 0.7.1 2020-10-08 [1] CRAN (R 4.0.2)
cluster 2.1.2 2021-04-17 [1] CRAN (R 4.0.3)
codetools 0.2-18 2020-11-04 [1] CRAN (R 4.0.2)
colorspace 2.0-1 2021-05-04 [1] CRAN (R 4.0.3)
cowplot * 1.1.1 2020-12-30 [1] CRAN (R 4.0.2)
crayon 1.4.1 2021-02-08 [1] CRAN (R 4.0.3)
data.table 1.14.0 2021-02-21 [1] CRAN (R 4.0.3)
DBI 1.1.1 2021-01-15 [1] CRAN (R 4.0.3)
dbplyr 2.1.1 2021-04-06 [1] CRAN (R 4.0.3)
DECIPHER * 2.18.1 2020-10-29 [1] Bioconductor
desc 1.3.0 2021-03-05 [1] CRAN (R 4.0.3)
details 0.2.1 2020-01-12 [1] CRAN (R 4.0.2)
digest 0.6.27 2020-10-24 [1] CRAN (R 4.0.2)
docxtools * 0.2.2 2020-06-03 [1] CRAN (R 4.0.3)
doParallel * 1.0.16 2020-10-16 [1] CRAN (R 4.0.2)
dplyr * 1.0.6 2021-05-05 [1] CRAN (R 4.0.3)
ellipsis 0.3.2 2021-04-29 [1] CRAN (R 4.0.3)
evaluate 0.14 2019-05-28 [1] CRAN (R 4.0.2)
extrafont * 0.17 2014-12-08 [1] CRAN (R 4.0.2)
extrafontdb 1.0 2012-06-11 [1] CRAN (R 4.0.2)
fansi 0.4.2 2021-01-15 [1] CRAN (R 4.0.3)
farver 2.1.0 2021-02-28 [1] CRAN (R 4.0.3)
fastmap 1.1.0 2021-01-25 [1] CRAN (R 4.0.3)
fastmatch 1.1-0 2017-01-28 [1] CRAN (R 4.0.2)
forcats * 0.5.1 2021-01-27 [1] CRAN (R 4.0.3)
foreach * 1.5.1 2020-10-15 [1] CRAN (R 4.0.2)
fs 1.5.0 2020-07-31 [1] CRAN (R 4.0.2)
generics 0.1.0 2020-10-31 [1] CRAN (R 4.0.2)
ggplot2 * 3.3.3 2020-12-30 [1] CRAN (R 4.0.2)
ggsci * 2.9 2018-05-14 [1] CRAN (R 4.0.2)
glmnet * 4.1-1 2021-02-21 [1] CRAN (R 4.0.3)
glue 1.4.2 2020-08-27 [1] CRAN (R 4.0.2)
gplots 3.1.1 2020-11-28 [1] CRAN (R 4.0.2)
gridExtra * 2.3 2017-09-09 [1] CRAN (R 4.0.2)
gtable 0.3.0 2019-03-25 [1] CRAN (R 4.0.2)
gtools 3.8.2 2020-03-31 [1] CRAN (R 4.0.2)
haven 2.4.1 2021-04-23 [1] CRAN (R 4.0.3)
hexbin 1.28.2 2021-01-08 [1] CRAN (R 4.0.2)
highr 0.9 2021-04-16 [1] CRAN (R 4.0.3)
hms 1.1.0 2021-05-17 [1] CRAN (R 4.0.3)
htmltools 0.5.1.1 2021-01-22 [1] CRAN (R 4.0.3)
httr 1.4.2 2020-07-20 [1] CRAN (R 4.0.2)
igraph 1.2.6 2020-10-06 [1] CRAN (R 4.0.2)
IRanges * 2.24.1 2020-12-12 [1] Bioconductor
iterators * 1.0.13 2020-10-15 [1] CRAN (R 4.0.2)
jsonlite 1.7.2 2020-12-09 [1] CRAN (R 4.0.2)
kableExtra * 1.3.4 2021-02-20 [1] CRAN (R 4.0.3)
KernSmooth 2.23-20 2021-05-03 [1] CRAN (R 4.0.3)
knitr * 1.33 2021-04-24 [1] CRAN (R 4.0.3)
labeling 0.4.2 2020-10-20 [1] CRAN (R 4.0.2)
lattice * 0.20-44 2021-05-02 [1] CRAN (R 4.0.3)
lifecycle 1.0.0 2021-02-15 [1] CRAN (R 4.0.3)
limma * 3.46.0 2020-10-27 [1] Bioconductor
locfit 1.5-9.4 2020-03-25 [1] CRAN (R 4.0.2)
lubridate 1.7.10 2021-02-26 [1] CRAN (R 4.0.3)
magrittr * 2.0.1 2020-11-17 [1] CRAN (R 4.0.2)
MASS * 7.3-54 2021-05-03 [1] CRAN (R 4.0.3)
Matrix * 1.3-3 2021-05-04 [1] CRAN (R 4.0.3)
matrixStats * 0.58.0 2021-01-29 [1] CRAN (R 4.0.3)
memoise 2.0.0 2021-01-26 [1] CRAN (R 4.0.3)
metagenomeSeq * 1.32.0 2020-10-27 [1] Bioconductor
mgcv 1.8-35 2021-04-18 [1] CRAN (R 4.0.3)
modelr 0.1.8 2020-05-19 [1] CRAN (R 4.0.2)
multtest * 2.46.0 2020-10-27 [1] Bioconductor
munsell 0.5.0 2018-06-12 [1] CRAN (R 4.0.2)
my.tools * 0.5 2020-09-30 [1] local
NADA * 1.6-1.1 2020-03-22 [1] CRAN (R 4.0.2)
nlme 3.1-152 2021-02-04 [1] CRAN (R 4.0.3)
permute * 0.9-5 2019-03-12 [1] CRAN (R 4.0.2)
phangorn * 2.7.0 2021-05-03 [1] CRAN (R 4.0.3)
phyloseq * 1.34.0 2020-10-27 [1] Bioconductor
pillar 1.6.1 2021-05-16 [1] CRAN (R 4.0.3)
pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.0.2)
plyr 1.8.6 2020-03-03 [1] CRAN (R 4.0.2)
png 0.1-7 2013-12-03 [1] CRAN (R 4.0.2)
preprocessCore 1.52.1 2021-01-08 [1] Bioconductor
prettyunits 1.1.1 2020-01-24 [1] CRAN (R 4.0.2)
progress 1.2.2 2019-05-16 [1] CRAN (R 4.0.2)
purrr * 0.3.4 2020-04-17 [1] CRAN (R 4.0.2)
quadprog 1.5-8 2019-11-20 [1] CRAN (R 4.0.2)
R6 2.5.0 2020-10-28 [1] CRAN (R 4.0.2)
ragg * 1.1.2 2021-03-17 [1] CRAN (R 4.0.3)
RColorBrewer * 1.1-2 2014-12-07 [1] CRAN (R 4.0.2)
Rcpp 1.0.6 2021-01-15 [1] CRAN (R 4.0.3)
readr * 1.4.0 2020-10-05 [1] CRAN (R 4.0.2)
readxl 1.3.1 2019-03-13 [1] CRAN (R 4.0.2)
reprex 2.0.0 2021-04-02 [1] CRAN (R 4.0.3)
reshape2 1.4.4 2020-04-09 [1] CRAN (R 4.0.2)
rhdf5 2.34.0 2020-10-27 [1] Bioconductor
rhdf5filters 1.2.1 2021-05-03 [1] Bioconductor
Rhdf5lib 1.12.1 2021-01-26 [1] Bioconductor
rlang 0.4.11 2021-04-30 [1] CRAN (R 4.0.3)
rmarkdown * 2.8 2021-05-07 [1] CRAN (R 4.0.3)
rprojroot 2.0.2 2020-11-15 [1] CRAN (R 4.0.2)
RSQLite * 2.2.7 2021-04-22 [1] CRAN (R 4.0.3)
rstudioapi 0.13 2020-11-12 [1] CRAN (R 4.0.2)
Rttf2pt1 1.3.8 2020-01-10 [1] CRAN (R 4.0.2)
rvest 1.0.0 2021-03-09 [1] CRAN (R 4.0.3)
S4Vectors * 0.28.1 2020-12-09 [1] Bioconductor
scales * 1.1.1 2020-05-11 [1] CRAN (R 4.0.2)
sessioninfo 1.1.1 2018-11-05 [1] CRAN (R 4.0.2)
shape 1.4.6 2021-05-19 [1] CRAN (R 4.0.3)
stringi 1.6.2 2021-05-17 [1] CRAN (R 4.0.3)
stringr * 1.4.0 2019-02-10 [1] CRAN (R 4.0.2)
survival * 3.2-11 2021-04-26 [1] CRAN (R 4.0.3)
svglite * 2.0.0 2021-02-20 [1] CRAN (R 4.0.3)
systemfonts 1.0.2 2021-05-11 [1] CRAN (R 4.0.3)
textshaping 0.3.4 2021-05-11 [1] CRAN (R 4.0.3)
tibble * 3.1.2 2021-05-16 [1] CRAN (R 4.0.3)
tidyr * 1.1.3 2021-03-03 [1] CRAN (R 4.0.3)
tidyselect 1.1.1 2021-04-30 [1] CRAN (R 4.0.3)
tidyverse * 1.3.1 2021-04-15 [1] CRAN (R 4.0.3)
truncnorm * 1.0-8 2018-02-27 [1] CRAN (R 4.0.2)
utf8 1.2.1 2021-03-12 [1] CRAN (R 4.0.3)
vctrs 0.3.8 2021-04-29 [1] CRAN (R 4.0.3)
vegan * 2.5-7 2020-11-28 [1] CRAN (R 4.0.3)
viridisLite 0.4.0 2021-04-13 [1] CRAN (R 4.0.3)
vsn * 3.58.0 2020-10-27 [1] Bioconductor
webshot 0.5.2 2019-11-22 [1] CRAN (R 4.0.2)
withr 2.4.2 2021-04-18 [1] CRAN (R 4.0.3)
Wrench 1.8.0 2020-10-27 [1] Bioconductor
xfun 0.23 2021-05-15 [1] CRAN (R 4.0.3)
xml2 1.3.2 2020-04-23 [1] CRAN (R 4.0.2)
XVector * 0.30.0 2020-10-27 [1] Bioconductor
yaml 2.2.1 2020-02-01 [1] CRAN (R 4.0.2)
zCompositions * 1.3.4 2020-03-04 [1] CRAN (R 4.0.2)
zlibbioc 1.36.0 2020-10-27 [1] Bioconductor
[1] /home/angel/R/library
[2] /usr/local/lib/R/site-library
[3] /usr/lib/R/site-library
[4] /usr/lib/R/libraryChen J, Chen L. GMPR: A novel normalization method for microbiome sequencing data. bioRxiv 2017:112565.
Fernandes AD, Macklaim JM, Linn TG et al. ANOVA-Like Differential Expression (ALDEx) Analysis for Mixed Population RNA-Seq. PLOS ONE 2013;8:e67019.
Paulson JN, Stine OC, Bravo HC et al. Differential abundance analysis for microbial marker-gene surveys. Nat Meth 2013;10:1200–2.